/*
 *  arch/ppc64/kernel/head.S
 *
 *  PowerPC version
 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
 *
 *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
 *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
 *  Adapted for Power Macintosh by Paul Mackerras.
 *  Low-level exception handlers and MMU support
 *  rewritten by Paul Mackerras.
 *    Copyright (C) 1996 Paul Mackerras.
 *
 *  Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
 *    Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
 *
 *  This file contains the low-level support and setup for the
 *  PowerPC-64 platform, including trap and interrupt dispatch.
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; either version
 *  2 of the License, or (at your option) any later version.
 */

#define SECONDARY_PROCESSORS

#include <linux/config.h>
#include <linux/threads.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/naca.h>
#include <asm/systemcfg.h>
#include <asm/ppc_asm.h>
#include <asm/offsets.h>
#include <asm/bug.h>
#include <asm/cputable.h>
#include <asm/setup.h>

#ifdef CONFIG_PPC_ISERIES
#define DO_SOFT_DISABLE
#endif

/*
 * hcall interface to pSeries LPAR
 */
#define HVSC		.long 0x44000022
#define H_SET_ASR	0x30

/*
 * We layout physical memory as follows:
 * 0x0000 - 0x00ff : Secondary processor spin code
 * 0x0100 - 0x2fff : pSeries Interrupt prologs
 * 0x3000 - 0x3fff : Interrupt support
 * 0x4000 - 0x4fff : NACA
 * 0x5000 - 0x5fff : SystemCfg
 * 0x6000	   : iSeries and common interrupt prologs
 * 0x9000 - 0x9fff : Initial segment table
 */

/*
 *   SPRG Usage
 *
 *   Register	Definition
 *
 *   SPRG0	reserved for hypervisor
 *   SPRG1	temp - used to save gpr
 *   SPRG2	temp - used to save gpr
 *   SPRG3	virt addr of paca
 */

/*
 * Entering into this code we make the following assumptions:
 *  For pSeries:
 *   1. The MMU is off & open firmware is running in real mode.
 *   2. The kernel is entered at __start
 *
 *  For iSeries:
 *   1. The MMU is on (as it always is for iSeries)
 *   2. The kernel is entered at system_reset_iSeries
 */

	.text
	.globl  _stext
_stext:
#ifdef CONFIG_PPC_MULTIPLATFORM
_GLOBAL(__start)
	/* NOP this out unconditionally */
BEGIN_FTR_SECTION
	b .__start_initialization_multiplatform
END_FTR_SECTION(0, 1)
#endif /* CONFIG_PPC_MULTIPLATFORM */

	/* Catch branch to 0 in real mode */
	trap
#ifdef CONFIG_PPC_ISERIES
	/*
	 * At offset 0x20, there is a pointer to iSeries LPAR data.
	 * This is required by the hypervisor
	 */
	. = 0x20
	.llong hvReleaseData-KERNELBASE

	/*
	 * At offset 0x28 and 0x30 are offsets to the msChunks
	 * array (used by the iSeries LPAR debugger to do translation
	 * between physical addresses and absolute addresses) and
	 * to the pidhash table (also used by the debugger)
	 */
	.llong msChunks-KERNELBASE
	.llong 0	/* pidhash-KERNELBASE SFRXXX */

	/* Offset 0x38 - Pointer to start of embedded System.map */
	.globl	embedded_sysmap_start
embedded_sysmap_start:
	.llong	0
	/* Offset 0x40 - Pointer to end of embedded System.map */
	.globl	embedded_sysmap_end
embedded_sysmap_end:
	.llong	0

#else /* CONFIG_PPC_ISERIES */

	/* Secondary processors spin on this value until it goes to 1. */
	.globl  __secondary_hold_spinloop
__secondary_hold_spinloop:
	.llong	0x0

	/* Secondary processors write this value with their cpu # */
	/* after they enter the spin loop immediately below.	  */
	.globl	__secondary_hold_acknowledge
__secondary_hold_acknowledge:
	.llong	0x0

	. = 0x60
/*
 * The following code is used on pSeries to hold secondary processors
 * in a spin loop after they have been freed from OpenFirmware, but
 * before the bulk of the kernel has been relocated.  This code
 * is relocated to physical address 0x60 before prom_init is run.
 * All of it must fit below the first exception vector at 0x100.
 */
_GLOBAL(__secondary_hold)
	mfmsr	r24
	ori	r24,r24,MSR_RI
	mtmsrd	r24			/* RI on */

	/* Grab our linux cpu number */
	mr	r24,r3

	/* Tell the master cpu we're here */
	/* Relocation is off & we are located at an address less */
	/* than 0x100, so only need to grab low order offset.    */
	std	r24,__secondary_hold_acknowledge@l(0)
	sync

	/* All secondary cpu's wait here until told to start. */
100:	ld	r4,__secondary_hold_spinloop@l(0)
	cmpdi	0,r4,1
	bne	100b

#ifdef CONFIG_HMT
	b	.hmt_init
#else
#ifdef CONFIG_SMP
	mr	r3,r24
	b	.pSeries_secondary_smp_init
#else
	BUG_OPCODE
#endif
#endif
#endif

/* This value is used to mark exception frames on the stack. */
	.section ".toc","aw"
exception_marker:
	.tc	ID_72656773_68657265[TC],0x7265677368657265
	.text

/*
 * The following macros define the code that appears as
 * the prologue to each of the exception handlers.  They
 * are split into two parts to allow a single kernel binary
 * to be used for pSeries and iSeries.
 * LOL.  One day... - paulus
 */

/*
 * We make as much of the exception code common between native
 * exception handlers (including pSeries LPAR) and iSeries LPAR
 * implementations as possible.
 */

/*
 * This is the start of the interrupt handlers for pSeries
 * This code runs with relocation off.
 */
#define EX_R9		0
#define EX_R10		8
#define EX_R11		16
#define EX_R12		24
#define EX_R13		32
#define EX_SRR0		40
#define EX_R3		40	/* SLB miss saves R3, but not SRR0 */
#define EX_DAR		48
#define EX_LR		48	/* SLB miss saves LR, but not DAR */
#define EX_DSISR	56
#define EX_CCR		60

#define EXCEPTION_PROLOG_PSERIES(area, label)				\
	mfspr	r13,SPRG3;		/* get paca address into r13 */	\
	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
	std	r10,area+EX_R10(r13);					\
	std	r11,area+EX_R11(r13);					\
	std	r12,area+EX_R12(r13);					\
	mfspr	r9,SPRG1;						\
	std	r9,area+EX_R13(r13);					\
	mfcr	r9;							\
	clrrdi	r12,r13,32;		/* get high part of &label */	\
	mfmsr	r10;							\
	mfspr	r11,SRR0;		/* save SRR0 */			\
	ori	r12,r12,(label)@l;	/* virt addr of handler */	\
	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI;				\
	mtspr	SRR0,r12;						\
	mfspr	r12,SRR1;		/* and SRR1 */			\
	mtspr	SRR1,r10;						\
	rfid;								\
	b	.	/* prevent speculative execution */

/*
 * This is the start of the interrupt handlers for iSeries
 * This code runs with relocation on.
 */
#define EXCEPTION_PROLOG_ISERIES_1(area)				\
	mfspr	r13,SPRG3;		/* get paca address into r13 */	\
	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
	std	r10,area+EX_R10(r13);					\
	std	r11,area+EX_R11(r13);					\
	std	r12,area+EX_R12(r13);					\
	mfspr	r9,SPRG1;						\
	std	r9,area+EX_R13(r13);					\
	mfcr	r9

#define EXCEPTION_PROLOG_ISERIES_2					\
	mfmsr	r10;							\
	ld	r11,PACALPPACA+LPPACASRR0(r13);				\
	ld	r12,PACALPPACA+LPPACASRR1(r13);				\
	ori	r10,r10,MSR_RI;						\
	mtmsrd	r10,1

/*
 * The common exception prolog is used for all except a few exceptions
 * such as a segment miss on a kernel address.  We have to be prepared
 * to take another exception from the point where we first touch the
 * kernel stack onwards.
 *
 * On entry r13 points to the paca, r9-r13 are saved in the paca,
 * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
 * SRR1, and relocation is on.
 */
#define EXCEPTION_PROLOG_COMMON(n, area)				   \
	andi.	r10,r12,MSR_PR;		/* See if coming from user	*/ \
	mr	r10,r1;			/* Save r1			*/ \
	subi	r1,r1,INT_FRAME_SIZE;	/* alloc frame on kernel stack	*/ \
	beq-	1f;							   \
	ld	r1,PACAKSAVE(r13);	/* kernel stack to use		*/ \
1:	cmpdi	cr1,r1,0;		/* check if r1 is in userspace	*/ \
	bge-	cr1,bad_stack;		/* abort if it is		*/ \
	std	r9,_CCR(r1);		/* save CR in stackframe	*/ \
	std	r11,_NIP(r1);		/* save SRR0 in stackframe	*/ \
	std	r12,_MSR(r1);		/* save SRR1 in stackframe	*/ \
	std	r10,0(r1);		/* make stack chain pointer	*/ \
	std	r0,GPR0(r1);		/* save r0 in stackframe	*/ \
	std	r10,GPR1(r1);		/* save r1 in stackframe	*/ \
	std	r2,GPR2(r1);		/* save r2 in stackframe	*/ \
	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe	*/ \
	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe	*/ \
	ld	r9,area+EX_R9(r13);	/* move r9, r10 to stackframe	*/ \
	ld	r10,area+EX_R10(r13);					   \
	std	r9,GPR9(r1);						   \
	std	r10,GPR10(r1);						   \
	ld	r9,area+EX_R11(r13);	/* move r11 - r13 to stackframe	*/ \
	ld	r10,area+EX_R12(r13);					   \
	ld	r11,area+EX_R13(r13);					   \
	std	r9,GPR11(r1);						   \
	std	r10,GPR12(r1);						   \
	std	r11,GPR13(r1);						   \
	ld	r2,PACATOC(r13);	/* get kernel TOC into r2	*/ \
	mflr	r9;			/* save LR in stackframe	*/ \
	std	r9,_LINK(r1);						   \
	mfctr	r10;			/* save CTR in stackframe	*/ \
	std	r10,_CTR(r1);						   \
	mfspr	r11,XER;		/* save XER in stackframe	*/ \
	std	r11,_XER(r1);						   \
	li	r9,(n)+1;						   \
	std	r9,_TRAP(r1);		/* set trap number		*/ \
	li	r10,0;							   \
	ld	r11,exception_marker@toc(r2);				   \
	std	r10,RESULT(r1);		/* clear regs->result		*/ \
	std	r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame	*/

/*
 * Exception vectors.
 */
#define STD_EXCEPTION_PSERIES(n, label)			\
	. = n;						\
	.globl label##_pSeries;				\
label##_pSeries:					\
	HMT_MEDIUM;					\
	mtspr	SPRG1,r13;		/* save r13 */	\
	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)

#define STD_EXCEPTION_ISERIES(n, label, area)		\
	.globl label##_iSeries;				\
label##_iSeries:					\
	HMT_MEDIUM;					\
	mtspr	SPRG1,r13;		/* save r13 */	\
	EXCEPTION_PROLOG_ISERIES_1(area);		\
	EXCEPTION_PROLOG_ISERIES_2;			\
	b	label##_common

#define MASKABLE_EXCEPTION_ISERIES(n, label)				\
	.globl label##_iSeries;						\
label##_iSeries:							\
	HMT_MEDIUM;							\
	mtspr	SPRG1,r13;		/* save r13 */			\
	EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN);				\
	lbz	r10,PACAPROCENABLED(r13);				\
	cmpwi	0,r10,0;						\
	beq-	label##_iSeries_masked;					\
	EXCEPTION_PROLOG_ISERIES_2;					\
	b	label##_common;						\

#ifdef DO_SOFT_DISABLE
#define DISABLE_INTS				\
	lbz	r10,PACAPROCENABLED(r13);	\
	li	r11,0;				\
	std	r10,SOFTE(r1);			\
	mfmsr	r10;				\
	stb	r11,PACAPROCENABLED(r13);	\
	ori	r10,r10,MSR_EE;			\
	mtmsrd	r10,1

#define ENABLE_INTS				\
	lbz	r10,PACAPROCENABLED(r13);	\
	mfmsr	r11;				\
	std	r10,SOFTE(r1);			\
	ori	r11,r11,MSR_EE;			\
	mtmsrd	r11,1

#else	/* hard enable/disable interrupts */
#define DISABLE_INTS

#define ENABLE_INTS				\
	ld	r12,_MSR(r1);			\
	mfmsr	r11;				\
	rlwimi	r11,r12,0,MSR_EE;		\
	mtmsrd	r11,1

#endif

#define STD_EXCEPTION_COMMON(trap, label, hdlr)		\
	.align	7;					\
	.globl label##_common;				\
label##_common:						\
	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);	\
	DISABLE_INTS;					\
	bl	.save_nvgprs;				\
	addi	r3,r1,STACK_FRAME_OVERHEAD;		\
	bl	hdlr;					\
	b	.ret_from_except

#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr)	\
	.align	7;					\
	.globl label##_common;				\
label##_common:						\
	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);	\
	DISABLE_INTS;					\
	addi	r3,r1,STACK_FRAME_OVERHEAD;		\
	bl	hdlr;					\
	b	.ret_from_except_lite

/*
 * Start of pSeries system interrupt routines
 */
	. = 0x100
	.globl __start_interrupts
__start_interrupts:

	STD_EXCEPTION_PSERIES(0x100, system_reset)

	. = 0x200
_machine_check_pSeries:
	HMT_MEDIUM
	mtspr	SPRG1,r13		/* save r13 */
	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)

	. = 0x300
	.globl data_access_pSeries
data_access_pSeries:
	HMT_MEDIUM
	mtspr	SPRG1,r13
BEGIN_FTR_SECTION
	mtspr	SPRG2,r12
	mfspr	r13,DAR
	mfspr	r12,DSISR
	srdi	r13,r13,60
	rlwimi	r13,r12,16,0x20
	mfcr	r12
	cmpwi	r13,0x2c
	beq	.do_stab_bolted_pSeries
	mtcrf	0x80,r12
	mfspr	r12,SPRG2
END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)

	. = 0x380
	.globl data_access_slb_pSeries
data_access_slb_pSeries:
	HMT_MEDIUM
	mtspr	SPRG1,r13
	mfspr	r13,SPRG3		/* get paca address into r13 */
	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
	std	r10,PACA_EXSLB+EX_R10(r13)
	std	r11,PACA_EXSLB+EX_R11(r13)
	std	r12,PACA_EXSLB+EX_R12(r13)
	std	r3,PACA_EXSLB+EX_R3(r13)
	mfspr	r9,SPRG1
	std	r9,PACA_EXSLB+EX_R13(r13)
	mfcr	r9
	mfspr	r12,SRR1		/* and SRR1 */
	mfspr	r3,DAR
	b	.do_slb_miss		/* Rel. branch works in real mode */

	STD_EXCEPTION_PSERIES(0x400, instruction_access)

	. = 0x480
	.globl instruction_access_slb_pSeries
instruction_access_slb_pSeries:
	HMT_MEDIUM
	mtspr	SPRG1,r13
	mfspr	r13,SPRG3		/* get paca address into r13 */
	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
	std	r10,PACA_EXSLB+EX_R10(r13)
	std	r11,PACA_EXSLB+EX_R11(r13)
	std	r12,PACA_EXSLB+EX_R12(r13)
	std	r3,PACA_EXSLB+EX_R3(r13)
	mfspr	r9,SPRG1
	std	r9,PACA_EXSLB+EX_R13(r13)
	mfcr	r9
	mfspr	r12,SRR1		/* and SRR1 */
	mfspr	r3,SRR0			/* SRR0 is faulting address */
	b	.do_slb_miss		/* Rel. branch works in real mode */

	STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
	STD_EXCEPTION_PSERIES(0x600, alignment)
	STD_EXCEPTION_PSERIES(0x700, program_check)
	STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
	STD_EXCEPTION_PSERIES(0x900, decrementer)
	STD_EXCEPTION_PSERIES(0xa00, trap_0a)
	STD_EXCEPTION_PSERIES(0xb00, trap_0b)

	. = 0xc00
	.globl	system_call_pSeries
system_call_pSeries:
	HMT_MEDIUM
	mr	r9,r13
	mfmsr	r10
	mfspr	r13,SPRG3
	mfspr	r11,SRR0
	clrrdi	r12,r13,32
	oris	r12,r12,system_call_common@h
	ori	r12,r12,system_call_common@l
	mtspr	SRR0,r12
	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI
	mfspr	r12,SRR1
	mtspr	SRR1,r10
	rfid
	b	.	/* prevent speculative execution */

	STD_EXCEPTION_PSERIES(0xd00, single_step)
	STD_EXCEPTION_PSERIES(0xe00, trap_0e)

	/* We need to deal with the Altivec unavailable exception
	 * here which is at 0xf20, thus in the middle of the
	 * prolog code of the PerformanceMonitor one. A little
	 * trickery is thus necessary
	 */
	. = 0xf00
	b	performance_monitor_pSeries

	STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable)

	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)

	/* moved from 0xf00 */
	STD_EXCEPTION_PSERIES(0x3000, performance_monitor)

	. = 0x3100
_GLOBAL(do_stab_bolted_pSeries)
	mtcrf	0x80,r12
	mfspr	r12,SPRG2
	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)

	
	/* Space for the naca.  Architected to be located at real address
	 * NACA_PHYS_ADDR.  Various tools rely on this location being fixed.
	 * The first dword of the naca is required by iSeries LPAR to
	 * point to itVpdAreas.  On pSeries native, this value is not used.
	 */
	. = NACA_PHYS_ADDR
	.globl __end_interrupts
__end_interrupts:
#ifdef CONFIG_PPC_ISERIES
	.globl naca
naca:
	.llong itVpdAreas
#endif

	. = SYSTEMCFG_PHYS_ADDR
	.globl __start_systemcfg
__start_systemcfg:
	. = (SYSTEMCFG_PHYS_ADDR + PAGE_SIZE)
	.globl __end_systemcfg
__end_systemcfg:

#ifdef CONFIG_PPC_ISERIES
	/*
	 * The iSeries LPAR map is at this fixed address
	 * so that the HvReleaseData structure can address
	 * it with a 32-bit offset.
	 *
	 * The VSID values below are dependent on the
	 * VSID generation algorithm.  See include/asm/mmu_context.h.
	 */

	.llong	2		/* # ESIDs to be mapped by hypervisor	 */
	.llong	1		/* # memory ranges to be mapped by hypervisor */
	.llong	STAB0_PAGE	/* Page # of segment table within load area	*/
	.llong	0		/* Reserved */
	.llong	0		/* Reserved */
	.llong	0		/* Reserved */
	.llong	0		/* Reserved */
	.llong	0		/* Reserved */
	.llong	(KERNELBASE>>SID_SHIFT)
	.llong	0x408f92c94	/* KERNELBASE VSID */
	/* We have to list the bolted VMALLOC segment here, too, so that it
	 * will be restored on shared processor switch */
	.llong	(VMALLOCBASE>>SID_SHIFT)
	.llong	0xf09b89af5	/* VMALLOCBASE VSID */
	.llong	8192		/* # pages to map (32 MB) */
	.llong	0		/* Offset from start of loadarea to start of map */
	.llong	0x408f92c940000	/* VPN of first page to map */

	. = 0x6100

/***  ISeries-LPAR interrupt handlers ***/

	STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC)

	.globl data_access_iSeries
data_access_iSeries:
	mtspr	SPRG1,r13
BEGIN_FTR_SECTION
	mtspr	SPRG2,r12
	mfspr	r13,DAR
	mfspr	r12,DSISR
	srdi	r13,r13,60
	rlwimi	r13,r12,16,0x20
	mfcr	r12
	cmpwi	r13,0x2c
	beq	.do_stab_bolted_iSeries
	mtcrf	0x80,r12
	mfspr	r12,SPRG2
END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
	EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN)
	EXCEPTION_PROLOG_ISERIES_2
	b	data_access_common

.do_stab_bolted_iSeries:
	mtcrf	0x80,r12
	mfspr	r12,SPRG2
	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
	EXCEPTION_PROLOG_ISERIES_2
	b	.do_stab_bolted

	.globl	data_access_slb_iSeries
data_access_slb_iSeries:
	mtspr	SPRG1,r13		/* save r13 */
	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
	std	r3,PACA_EXSLB+EX_R3(r13)
	ld	r12,PACALPPACA+LPPACASRR1(r13)
	mfspr	r3,DAR
	b	.do_slb_miss

	STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)

	.globl	instruction_access_slb_iSeries
instruction_access_slb_iSeries:
	mtspr	SPRG1,r13		/* save r13 */
	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
	std	r3,PACA_EXSLB+EX_R3(r13)
	ld	r12,PACALPPACA+LPPACASRR1(r13)
	ld	r3,PACALPPACA+LPPACASRR0(r13)
	b	.do_slb_miss

	MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
	STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
	STD_EXCEPTION_ISERIES(0x700, program_check, PACA_EXGEN)
	STD_EXCEPTION_ISERIES(0x800, fp_unavailable, PACA_EXGEN)
	MASKABLE_EXCEPTION_ISERIES(0x900, decrementer)
	STD_EXCEPTION_ISERIES(0xa00, trap_0a, PACA_EXGEN)
	STD_EXCEPTION_ISERIES(0xb00, trap_0b, PACA_EXGEN)

	.globl	system_call_iSeries
system_call_iSeries:
	mr	r9,r13
	mfspr	r13,SPRG3
	EXCEPTION_PROLOG_ISERIES_2
	b	system_call_common

	STD_EXCEPTION_ISERIES( 0xd00, single_step, PACA_EXGEN)
	STD_EXCEPTION_ISERIES( 0xe00, trap_0e, PACA_EXGEN)
	STD_EXCEPTION_ISERIES( 0xf00, performance_monitor, PACA_EXGEN)

	.globl system_reset_iSeries
system_reset_iSeries:
	mfspr	r13,SPRG3		/* Get paca address */
	mfmsr	r24
	ori	r24,r24,MSR_RI
	mtmsrd	r24			/* RI on */
	lhz	r24,PACAPACAINDEX(r13)	/* Get processor # */
	cmpwi	0,r24,0			/* Are we processor 0? */
	beq	.__start_initialization_iSeries	/* Start up the first processor */
	mfspr	r4,CTRLF
	li	r5,RUNLATCH		/* Turn off the run light */
	andc	r4,r4,r5
	mtspr	CTRLT,r4

1:
	HMT_LOW
#ifdef CONFIG_SMP
	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor
					 * should start */
	sync
	LOADADDR(r3,current_set)
	sldi	r28,r24,3		/* get current_set[cpu#] */
	ldx	r3,r3,r28
	addi	r1,r3,THREAD_SIZE
	subi	r1,r1,STACK_FRAME_OVERHEAD

	cmpwi	0,r23,0
	beq	iSeries_secondary_smp_loop	/* Loop until told to go */
#ifdef SECONDARY_PROCESSORS
	bne	.__secondary_start		/* Loop until told to go */
#endif
iSeries_secondary_smp_loop:
	/* Let the Hypervisor know we are alive */
	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
	lis	r3,0x8002
	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
#else /* CONFIG_SMP */
	/* Yield the processor.  This is required for non-SMP kernels
		which are running on multi-threaded machines. */
	lis	r3,0x8000
	rldicr	r3,r3,32,15		/* r3 = (r3 << 32) & 0xffff000000000000 */
	addi	r3,r3,18		/* r3 = 0x8000000000000012 which is "yield" */
	li	r4,0			/* "yield timed" */
	li	r5,-1			/* "yield forever" */
#endif /* CONFIG_SMP */
	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
	sc				/* Invoke the hypervisor via a system call */
	mfspr	r13,SPRG3		/* Put r13 back ???? */
	b	1b			/* If SMP not configured, secondaries
					 * loop forever */

	.globl decrementer_iSeries_masked
decrementer_iSeries_masked:
	li	r11,1
	stb	r11,PACALPPACA+LPPACADECRINT(r13)
	lwz	r12,PACADEFAULTDECR(r13)
	mtspr	SPRN_DEC,r12
	/* fall through */

	.globl hardware_interrupt_iSeries_masked
hardware_interrupt_iSeries_masked:
	mtcrf	0x80,r9		/* Restore regs */
	ld	r11,PACALPPACA+LPPACASRR0(r13)
	ld	r12,PACALPPACA+LPPACASRR1(r13)
	mtspr	SRR0,r11
	mtspr	SRR1,r12
	ld	r9,PACA_EXGEN+EX_R9(r13)
	ld	r10,PACA_EXGEN+EX_R10(r13)
	ld	r11,PACA_EXGEN+EX_R11(r13)
	ld	r12,PACA_EXGEN+EX_R12(r13)
	ld	r13,PACA_EXGEN+EX_R13(r13)
	rfid
	b	.	/* prevent speculative execution */
#endif

/*
 * Data area reserved for FWNMI option.
 */
	.= 0x7000
	.globl fwnmi_data_area
fwnmi_data_area:

/*
 * Vectors for the FWNMI option.  Share common code.
 */
	. = 0x8000
	.globl system_reset_fwnmi
system_reset_fwnmi:
	HMT_MEDIUM
	mtspr	SPRG1,r13		/* save r13 */
	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
	.globl machine_check_fwnmi
machine_check_fwnmi:
	HMT_MEDIUM
	mtspr	SPRG1,r13		/* save r13 */
	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)

	/*
	 * Space for the initial segment table
	 * For LPAR, the hypervisor must fill in at least one entry
	 * before we get control (with relocate on)
	 */
	. = STAB0_PHYS_ADDR
	.globl __start_stab
__start_stab:

	. = (STAB0_PHYS_ADDR + PAGE_SIZE)
	.globl __end_stab
__end_stab:


/*** Common interrupt handlers ***/

	STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)

	/*
	 * Machine check is different because we use a different
	 * save area: PACA_EXMC instead of PACA_EXGEN.
	 */
	.align	7
	.globl machine_check_common
machine_check_common:
	EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
	DISABLE_INTS
	bl	.save_nvgprs
	addi	r3,r1,STACK_FRAME_OVERHEAD
	bl	.machine_check_exception
	b	.ret_from_except

	STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
	STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
	STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
	STD_EXCEPTION_COMMON(0xf00, performance_monitor, .performance_monitor_exception)
	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
#ifdef CONFIG_ALTIVEC
	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
#else
	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
#endif

/*
 * Here we have detected that the kernel stack pointer is bad.
 * R9 contains the saved CR, r13 points to the paca,
 * r10 contains the (bad) kernel stack pointer,
 * r11 and r12 contain the saved SRR0 and SRR1.
 * We switch to using the paca guard page as an emergency stack,
 * save the registers there, and call kernel_bad_stack(), which panics.
 */
bad_stack:
	ld	r1,PACAEMERGSP(r13)
	subi	r1,r1,64+INT_FRAME_SIZE
	std	r9,_CCR(r1)
	std	r10,GPR1(r1)
	std	r11,_NIP(r1)
	std	r12,_MSR(r1)
	mfspr	r11,DAR
	mfspr	r12,DSISR
	std	r11,_DAR(r1)
	std	r12,_DSISR(r1)
	mflr	r10
	mfctr	r11
	mfxer	r12
	std	r10,_LINK(r1)
	std	r11,_CTR(r1)
	std	r12,_XER(r1)
	SAVE_GPR(0,r1)
	SAVE_GPR(2,r1)
	SAVE_4GPRS(3,r1)
	SAVE_2GPRS(7,r1)
	SAVE_10GPRS(12,r1)
	SAVE_10GPRS(22,r1)
	addi	r11,r1,INT_FRAME_SIZE
	std	r11,0(r1)
	li	r12,0
	std	r12,0(r11)
	ld	r2,PACATOC(r13)
1:	addi	r3,r1,STACK_FRAME_OVERHEAD
	bl	.kernel_bad_stack
	b	1b

/*
 * Return from an exception with minimal checks.
 * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
 * If interrupts have been enabled, or anything has been
 * done that might have changed the scheduling status of
 * any task or sent any task a signal, you should use
 * ret_from_except or ret_from_except_lite instead of this.
 */
fast_exception_return:
	ld	r12,_MSR(r1)
	ld	r11,_NIP(r1)
	andi.	r3,r12,MSR_RI		/* check if RI is set */
	beq-	unrecov_fer
	ld	r3,_CCR(r1)
	ld	r4,_LINK(r1)
	ld	r5,_CTR(r1)
	ld	r6,_XER(r1)
	mtcr	r3
	mtlr	r4
	mtctr	r5
	mtxer	r6
	REST_GPR(0, r1)
	REST_8GPRS(2, r1)

	mfmsr	r10
	clrrdi	r10,r10,2		/* clear RI (LE is 0 already) */
	mtmsrd	r10,1

	mtspr	SRR1,r12
	mtspr	SRR0,r11
	REST_4GPRS(10, r1)
	ld	r1,GPR1(r1)
	rfid
	b	.	/* prevent speculative execution */

unrecov_fer:
	bl	.save_nvgprs
1:	addi	r3,r1,STACK_FRAME_OVERHEAD
	bl	.unrecoverable_exception
	b	1b

/*
 * Here r13 points to the paca, r9 contains the saved CR,
 * SRR0 and SRR1 are saved in r11 and r12,
 * r9 - r13 are saved in paca->exgen.
 */
	.align	7
	.globl data_access_common
data_access_common:
	mfspr	r10,DAR
	std	r10,PACA_EXGEN+EX_DAR(r13)
	mfspr	r10,DSISR
	stw	r10,PACA_EXGEN+EX_DSISR(r13)
	EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
	ld	r3,PACA_EXGEN+EX_DAR(r13)
	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
	li	r5,0x300
	b	.do_hash_page	 	/* Try to handle as hpte fault */

	.align	7
	.globl instruction_access_common
instruction_access_common:
	EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
	ld	r3,_NIP(r1)
	andis.	r4,r12,0x5820
	li	r5,0x400
	b	.do_hash_page		/* Try to handle as hpte fault */

	.align	7
	.globl hardware_interrupt_common
	.globl hardware_interrupt_entry
hardware_interrupt_common:
	EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
hardware_interrupt_entry:
	DISABLE_INTS
	addi	r3,r1,STACK_FRAME_OVERHEAD
	bl	.do_IRQ
	b	.ret_from_except_lite

	.align	7
	.globl alignment_common
alignment_common:
	mfspr	r10,DAR
	std	r10,PACA_EXGEN+EX_DAR(r13)
	mfspr	r10,DSISR
	stw	r10,PACA_EXGEN+EX_DSISR(r13)
	EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
	ld	r3,PACA_EXGEN+EX_DAR(r13)
	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
	std	r3,_DAR(r1)
	std	r4,_DSISR(r1)
	bl	.save_nvgprs
	addi	r3,r1,STACK_FRAME_OVERHEAD
	ENABLE_INTS
	bl	.alignment_exception
	b	.ret_from_except

	.align	7
	.globl program_check_common
program_check_common:
	EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
	bl	.save_nvgprs
	addi	r3,r1,STACK_FRAME_OVERHEAD
	ENABLE_INTS
	bl	.program_check_exception
	b	.ret_from_except

	.align	7
	.globl fp_unavailable_common
fp_unavailable_common:
	EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
	bne	.load_up_fpu		/* if from user, just load it up */
	bl	.save_nvgprs
	addi	r3,r1,STACK_FRAME_OVERHEAD
	ENABLE_INTS
	bl	.kernel_fp_unavailable_exception
	BUG_OPCODE

	.align	7
	.globl altivec_unavailable_common
altivec_unavailable_common:
	EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
#ifdef CONFIG_ALTIVEC
	bne	.load_up_altivec	/* if from user, just load it up */
#endif
	bl	.save_nvgprs
	addi	r3,r1,STACK_FRAME_OVERHEAD
	ENABLE_INTS
	bl	.altivec_unavailable_exception
	b	.ret_from_except

/*
 * Hash table stuff
 */
	.align	7
_GLOBAL(do_hash_page)
	std	r3,_DAR(r1)
	std	r4,_DSISR(r1)

	andis.	r0,r4,0xa450		/* weird error? */
	bne-	.handle_page_fault	/* if not, try to insert a HPTE */
BEGIN_FTR_SECTION
	andis.	r0,r4,0x0020		/* Is it a segment table fault? */
	bne-	.do_ste_alloc		/* If so handle it */
END_FTR_SECTION_IFCLR(CPU_FTR_SLB)

	/*
	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
	 * accessing a userspace segment (even from the kernel). We assume
	 * kernel addresses always have the high bit set.
	 */
	rlwinm	r4,r4,32-23,29,29	/* DSISR_STORE -> _PAGE_RW */
	rotldi	r0,r3,15		/* Move high bit into MSR_PR posn */
	orc	r0,r12,r0		/* MSR_PR | ~high_bit */
	rlwimi	r4,r0,32-13,30,30	/* becomes _PAGE_USER access bit */
	ori	r4,r4,1			/* add _PAGE_PRESENT */

	/*
	 * On iSeries, we soft-disable interrupts here, then
	 * hard-enable interrupts so that the hash_page code can spin on
	 * the hash_table_lock without problems on a shared processor.
	 */
	DISABLE_INTS

	/*
	 * r3 contains the faulting address
	 * r4 contains the required access permissions
	 * r5 contains the trap number
	 *
	 * at return r3 = 0 for success
	 */
	bl	.hash_page		/* build HPTE if possible */
	cmpdi	r3,0			/* see if hash_page succeeded */

#ifdef DO_SOFT_DISABLE
	/*
	 * If we had interrupts soft-enabled at the point where the
	 * DSI/ISI occurred, and an interrupt came in during hash_page,
	 * handle it now.
	 * We jump to ret_from_except_lite rather than fast_exception_return
	 * because ret_from_except_lite will check for and handle pending
	 * interrupts if necessary.
	 */
	beq	.ret_from_except_lite
	/* For a hash failure, we don't bother re-enabling interrupts */
	ble-	12f

	/*
	 * hash_page couldn't handle it, set soft interrupt enable back
	 * to what it was before the trap.  Note that .local_irq_restore
	 * handles any interrupts pending at this point.
	 */
	ld	r3,SOFTE(r1)
	bl	.local_irq_restore
	b	11f
#else
	beq	fast_exception_return   /* Return from exception on success */
	ble-	12f			/* Failure return from hash_page */

	/* fall through */
#endif

/* Here we have a page fault that hash_page can't handle. */
_GLOBAL(handle_page_fault)
	ENABLE_INTS
11:	ld	r4,_DAR(r1)
	ld	r5,_DSISR(r1)
	addi	r3,r1,STACK_FRAME_OVERHEAD
	bl	.do_page_fault
	cmpdi	r3,0
	beq+	.ret_from_except_lite
	bl	.save_nvgprs
	mr	r5,r3
	addi	r3,r1,STACK_FRAME_OVERHEAD
	lwz	r4,_DAR(r1)
	bl	.bad_page_fault
	b	.ret_from_except

/* We have a page fault that hash_page could handle but HV refused
 * the PTE insertion
 */
12:	bl	.save_nvgprs
	addi	r3,r1,STACK_FRAME_OVERHEAD
	lwz	r4,_DAR(r1)
	bl	.low_hash_fault
	b	.ret_from_except

	/* here we have a segment miss */
_GLOBAL(do_ste_alloc)
	bl	.ste_allocate		/* try to insert stab entry */
	cmpdi	r3,0
	beq+	fast_exception_return
	b	.handle_page_fault

/*
 * r13 points to the PACA, r9 contains the saved CR,
 * r11 and r12 contain the saved SRR0 and SRR1.
 * r9 - r13 are saved in paca->exslb.
 * We assume we aren't going to take any exceptions during this procedure.
 * We assume (DAR >> 60) == 0xc.
 */
	.align	7
_GLOBAL(do_stab_bolted)
	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
	std	r11,PACA_EXSLB+EX_SRR0(r13)	/* save SRR0 in exc. frame */

	/* Hash to the primary group */
	ld	r10,PACASTABVIRT(r13)
	mfspr	r11,DAR
	srdi	r11,r11,28
	rldimi	r10,r11,7,52	/* r10 = first ste of the group */

	/* Calculate VSID */
	/* This is a kernel address, so protovsid = ESID */
	ASM_VSID_SCRAMBLE(r11, r9)
	rldic	r9,r11,12,16	/* r9 = vsid << 12 */

	/* Search the primary group for a free entry */
1:	ld	r11,0(r10)	/* Test valid bit of the current ste	*/
	andi.	r11,r11,0x80
	beq	2f
	addi	r10,r10,16
	andi.	r11,r10,0x70
	bne	1b

	/* Stick for only searching the primary group for now.		*/
	/* At least for now, we use a very simple random castout scheme */
	/* Use the TB as a random number ;  OR in 1 to avoid entry 0	*/
	mftb	r11
	rldic	r11,r11,4,57	/* r11 = (r11 << 4) & 0x70 */
	ori	r11,r11,0x10

	/* r10 currently points to an ste one past the group of interest */
	/* make it point to the randomly selected entry			*/
	subi	r10,r10,128
	or 	r10,r10,r11	/* r10 is the entry to invalidate	*/

	isync			/* mark the entry invalid		*/
	ld	r11,0(r10)
	rldicl	r11,r11,56,1	/* clear the valid bit */
	rotldi	r11,r11,8
	std	r11,0(r10)
	sync

	clrrdi	r11,r11,28	/* Get the esid part of the ste		*/
	slbie	r11

2:	std	r9,8(r10)	/* Store the vsid part of the ste	*/
	eieio

	mfspr	r11,DAR		/* Get the new esid			*/
	clrrdi	r11,r11,28	/* Permits a full 32b of ESID		*/
	ori	r11,r11,0x90	/* Turn on valid and kp			*/
	std	r11,0(r10)	/* Put new entry back into the stab	*/

	sync

	/* All done -- return from exception. */
	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
	ld	r11,PACA_EXSLB+EX_SRR0(r13)	/* get saved SRR0 */

	andi.	r10,r12,MSR_RI
	beq-	unrecov_slb

	mtcrf	0x80,r9			/* restore CR */

	mfmsr	r10
	clrrdi	r10,r10,2
	mtmsrd	r10,1

	mtspr	SRR0,r11
	mtspr	SRR1,r12
	ld	r9,PACA_EXSLB+EX_R9(r13)
	ld	r10,PACA_EXSLB+EX_R10(r13)
	ld	r11,PACA_EXSLB+EX_R11(r13)
	ld	r12,PACA_EXSLB+EX_R12(r13)
	ld	r13,PACA_EXSLB+EX_R13(r13)
	rfid
	b	.	/* prevent speculative execution */

/*
 * r13 points to the PACA, r9 contains the saved CR,
 * r11 and r12 contain the saved SRR0 and SRR1.
 * r3 has the faulting address
 * r9 - r13 are saved in paca->exslb.
 * r3 is saved in paca->slb_r3
 * We assume we aren't going to take any exceptions during this procedure.
 */
_GLOBAL(do_slb_miss)
	mflr	r10

	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */

	bl	.slb_allocate			/* handle it */

	/* All done -- return from exception. */

	ld	r10,PACA_EXSLB+EX_LR(r13)
	ld	r3,PACA_EXSLB+EX_R3(r13)
	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
#ifdef CONFIG_PPC_ISERIES
	ld	r11,PACALPPACA+LPPACASRR0(r13)	/* get SRR0 value */
#endif /* CONFIG_PPC_ISERIES */

	mtlr	r10

	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
	beq-	unrecov_slb

.machine	push
.machine	"power4"
	mtcrf	0x80,r9
	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
.machine	pop

#ifdef CONFIG_PPC_ISERIES
	mtspr	SRR0,r11
	mtspr	SRR1,r12
#endif /* CONFIG_PPC_ISERIES */
	ld	r9,PACA_EXSLB+EX_R9(r13)
	ld	r10,PACA_EXSLB+EX_R10(r13)
	ld	r11,PACA_EXSLB+EX_R11(r13)
	ld	r12,PACA_EXSLB+EX_R12(r13)
	ld	r13,PACA_EXSLB+EX_R13(r13)
	rfid
	b	.	/* prevent speculative execution */

unrecov_slb:
	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
	DISABLE_INTS
	bl	.save_nvgprs
1:	addi	r3,r1,STACK_FRAME_OVERHEAD
	bl	.unrecoverable_exception
	b	1b


/*
 * On pSeries, secondary processors spin in the following code.
 * At entry, r3 = this processor's number (physical cpu id)
 */
_GLOBAL(pSeries_secondary_smp_init)
	mr	r24,r3
	
	/* turn on 64-bit mode */
	bl	.enable_64b_mode
	isync

	/* Copy some CPU settings from CPU 0 */
	bl	.__restore_cpu_setup

	/* Set up a paca value for this processor. Since we have the
	 * physical cpu id in r3, we need to search the pacas to find
	 * which logical id maps to our physical one.
	 */
	LOADADDR(r13, paca) 		/* Get base vaddr of paca array	 */
	li	r5,0			/* logical cpu id                */
1:	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
	cmpw	r6,r24			/* Compare to our id             */
	beq	2f
	addi	r13,r13,PACA_SIZE	/* Loop to next PACA on miss     */
	addi	r5,r5,1
	cmpwi	r5,NR_CPUS
	blt	1b

99:	HMT_LOW				/* Couldn't find our CPU id      */
	b	99b

2:	mtspr	SPRG3,r13		/* Save vaddr of paca in SPRG3	 */
	/* From now on, r24 is expected to be logica cpuid */
	mr	r24,r5
3:	HMT_LOW
	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
					/* start.			 */
	sync

	/* Create a temp kernel stack for use before relocation is on.	*/
	ld	r1,PACAEMERGSP(r13)
	subi	r1,r1,STACK_FRAME_OVERHEAD

	cmpwi	0,r23,0
#ifdef CONFIG_SMP
#ifdef SECONDARY_PROCESSORS
	bne	.__secondary_start
#endif
#endif
	b 	3b			/* Loop until told to go	 */

#ifdef CONFIG_PPC_ISERIES
_STATIC(__start_initialization_iSeries)
	/* Clear out the BSS */
	LOADADDR(r11,__bss_stop)
	LOADADDR(r8,__bss_start)
	sub	r11,r11,r8		/* bss size			*/
	addi	r11,r11,7		/* round up to an even double word */
	rldicl. r11,r11,61,3		/* shift right by 3		*/
	beq	4f
	addi	r8,r8,-8
	li	r0,0
	mtctr	r11			/* zero this many doublewords	*/
3:	stdu	r0,8(r8)
	bdnz	3b
4:
	LOADADDR(r1,init_thread_union)
	addi	r1,r1,THREAD_SIZE
	li	r0,0
	stdu	r0,-STACK_FRAME_OVERHEAD(r1)

	LOADADDR(r3,cpu_specs)
	LOADADDR(r4,cur_cpu_spec)
	li	r5,0
	bl	.identify_cpu

	LOADADDR(r2,__toc_start)
	addi	r2,r2,0x4000
	addi	r2,r2,0x4000

	LOADADDR(r9,systemcfg)
	SET_REG_TO_CONST(r4, SYSTEMCFG_VIRT_ADDR)
	std	r4,0(r9)		/* set the systemcfg pointer */

	bl	.iSeries_early_setup

	/* relocation is on at this point */

	b	.start_here_common
#endif /* CONFIG_PPC_ISERIES */

#ifdef CONFIG_PPC_MULTIPLATFORM

_STATIC(__mmu_off)
	mfmsr	r3
	andi.	r0,r3,MSR_IR|MSR_DR
	beqlr
	andc	r3,r3,r0
	mtspr	SPRN_SRR0,r4
	mtspr	SPRN_SRR1,r3
	sync
	rfid
	b	.	/* prevent speculative execution */


/*
 * Here is our main kernel entry point. We support currently 2 kind of entries
 * depending on the value of r5.
 *
 *   r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content
 *                 in r3...r7
 *   
 *   r5 == NULL -> kexec style entry. r3 is a physical pointer to the
 *                 DT block, r4 is a physical pointer to the kernel itself
 *
 */
_GLOBAL(__start_initialization_multiplatform)
	/*
	 * Are we booted from a PROM Of-type client-interface ?
	 */
	cmpldi	cr0,r5,0
	bne	.__boot_from_prom		/* yes -> prom */

	/* Save parameters */
	mr	r31,r3
	mr	r30,r4

	/* Make sure we are running in 64 bits mode */
	bl	.enable_64b_mode

	/* Setup some critical 970 SPRs before switching MMU off */
	bl	.__970_cpu_preinit

	/* cpu # */
	li	r24,0

	/* Switch off MMU if not already */
	LOADADDR(r4, .__after_prom_start - KERNELBASE)
	add	r4,r4,r30
	bl	.__mmu_off
	b	.__after_prom_start

_STATIC(__boot_from_prom)
	/* Save parameters */
	mr	r31,r3
	mr	r30,r4
	mr	r29,r5
	mr	r28,r6
	mr	r27,r7

	/* Make sure we are running in 64 bits mode */
	bl	.enable_64b_mode

	/* put a relocation offset into r3 */
	bl	.reloc_offset

	LOADADDR(r2,__toc_start)
	addi	r2,r2,0x4000
	addi	r2,r2,0x4000

	/* Relocate the TOC from a virt addr to a real addr */
	sub	r2,r2,r3

	/* Restore parameters */
	mr	r3,r31
	mr	r4,r30
	mr	r5,r29
	mr	r6,r28
	mr	r7,r27

	/* Do all of the interaction with OF client interface */
	bl	.prom_init
	/* We never return */
	trap

/*
 * At this point, r3 contains the physical address we are running at,
 * returned by prom_init()
 */
_STATIC(__after_prom_start)

/*
 * We need to run with __start at physical address 0.
 * This will leave some code in the first 256B of
 * real memory, which are reserved for software use.
 * The remainder of the first page is loaded with the fixed
 * interrupt vectors.  The next two pages are filled with
 * unknown exception placeholders.
 *
 * Note: This process overwrites the OF exception vectors.
 *	r26 == relocation offset
 *	r27 == KERNELBASE
 */
	bl	.reloc_offset
	mr	r26,r3
	SET_REG_TO_CONST(r27,KERNELBASE)

	li	r3,0			/* target addr */

	// XXX FIXME: Use phys returned by OF (r30)
	sub	r4,r27,r26 		/* source addr			 */
					/* current address of _start	 */
					/*   i.e. where we are running	 */
					/*	the source addr		 */

	LOADADDR(r5,copy_to_here)	/* # bytes of memory to copy	 */
	sub	r5,r5,r27

	li	r6,0x100		/* Start offset, the first 0x100 */
					/* bytes were copied earlier.	 */

	bl	.copy_and_flush		/* copy the first n bytes	 */
					/* this includes the code being	 */
					/* executed here.		 */

	LOADADDR(r0, 4f)		/* Jump to the copy of this code */
	mtctr	r0			/* that we just made/relocated	 */
	bctr

4:	LOADADDR(r5,klimit)
	sub	r5,r5,r26
	ld	r5,0(r5)		/* get the value of klimit */
	sub	r5,r5,r27
	bl	.copy_and_flush		/* copy the rest */
	b	.start_here_multiplatform

#endif /* CONFIG_PPC_MULTIPLATFORM */

/*
 * Copy routine used to copy the kernel to start at physical address 0
 * and flush and invalidate the caches as needed.
 * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
 * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
 *
 * Note: this routine *only* clobbers r0, r6 and lr
 */
_GLOBAL(copy_and_flush)
	addi	r5,r5,-8
	addi	r6,r6,-8
4:	li	r0,16			/* Use the least common		*/
					/* denominator cache line	*/
					/* size.  This results in	*/
					/* extra cache line flushes	*/
					/* but operation is correct.	*/
					/* Can't get cache line size	*/
					/* from NACA as it is being	*/
					/* moved too.			*/

	mtctr	r0			/* put # words/line in ctr	*/
3:	addi	r6,r6,8			/* copy a cache line		*/
	ldx	r0,r6,r4
	stdx	r0,r6,r3
	bdnz	3b
	dcbst	r6,r3			/* write it to memory		*/
	sync
	icbi	r6,r3			/* flush the icache line	*/
	cmpld	0,r6,r5
	blt	4b
	sync
	addi	r5,r5,8
	addi	r6,r6,8
	blr

.align 8
copy_to_here:

/*
 * load_up_fpu(unused, unused, tsk)
 * Disable FP for the task which had the FPU previously,
 * and save its floating-point registers in its thread_struct.
 * Enables the FPU for use in the kernel on return.
 * On SMP we know the fpu is free, since we give it up every
 * switch (ie, no lazy save of the FP registers).
 * On entry: r13 == 'current' && last_task_used_math != 'current'
 */
_STATIC(load_up_fpu)
	mfmsr	r5			/* grab the current MSR */
	ori	r5,r5,MSR_FP
	mtmsrd	r5			/* enable use of fpu now */
	isync
/*
 * For SMP, we don't do lazy FPU switching because it just gets too
 * horrendously complex, especially when a task switches from one CPU
 * to another.  Instead we call giveup_fpu in switch_to.
 *
 */
#ifndef CONFIG_SMP
	ld	r3,last_task_used_math@got(r2)
	ld	r4,0(r3)
	cmpdi	0,r4,0
	beq	1f
	/* Save FP state to last_task_used_math's THREAD struct */
	addi	r4,r4,THREAD
	SAVE_32FPRS(0, r4)
	mffs	fr0
	stfd	fr0,THREAD_FPSCR(r4)
	/* Disable FP for last_task_used_math */
	ld	r5,PT_REGS(r4)
	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
	li	r6,MSR_FP|MSR_FE0|MSR_FE1
	andc	r4,r4,r6
	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#endif /* CONFIG_SMP */
	/* enable use of FP after return */
	ld	r4,PACACURRENT(r13)
	addi	r5,r4,THREAD		/* Get THREAD */
	ld	r4,THREAD_FPEXC_MODE(r5)
	ori	r12,r12,MSR_FP
	or	r12,r12,r4
	std	r12,_MSR(r1)
	lfd	fr0,THREAD_FPSCR(r5)
	mtfsf	0xff,fr0
	REST_32FPRS(0, r5)
#ifndef CONFIG_SMP
	/* Update last_task_used_math to 'current' */
	subi	r4,r5,THREAD		/* Back to 'current' */
	std	r4,0(r3)
#endif /* CONFIG_SMP */
	/* restore registers and return */
	b	fast_exception_return

/*
 * disable_kernel_fp()
 * Disable the FPU.
 */
_GLOBAL(disable_kernel_fp)
	mfmsr	r3
	rldicl	r0,r3,(63-MSR_FP_LG),1
	rldicl	r3,r0,(MSR_FP_LG+1),0
	mtmsrd	r3			/* disable use of fpu now */
	isync
	blr

/*
 * giveup_fpu(tsk)
 * Disable FP for the task given as the argument,
 * and save the floating-point registers in its thread_struct.
 * Enables the FPU for use in the kernel on return.
 */
_GLOBAL(giveup_fpu)
	mfmsr	r5
	ori	r5,r5,MSR_FP
	mtmsrd	r5			/* enable use of fpu now */
	isync
	cmpdi	0,r3,0
	beqlr-				/* if no previous owner, done */
	addi	r3,r3,THREAD		/* want THREAD of task */
	ld	r5,PT_REGS(r3)
	cmpdi	0,r5,0
	SAVE_32FPRS(0, r3)
	mffs	fr0
	stfd	fr0,THREAD_FPSCR(r3)
	beq	1f
	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
	li	r3,MSR_FP|MSR_FE0|MSR_FE1
	andc	r4,r4,r3		/* disable FP for previous task */
	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#ifndef CONFIG_SMP
	li	r5,0
	ld	r4,last_task_used_math@got(r2)
	std	r5,0(r4)
#endif /* CONFIG_SMP */
	blr


#ifdef CONFIG_ALTIVEC
		
/*
 * load_up_altivec(unused, unused, tsk)
 * Disable VMX for the task which had it previously,
 * and save its vector registers in its thread_struct.
 * Enables the VMX for use in the kernel on return.
 * On SMP we know the VMX is free, since we give it up every
 * switch (ie, no lazy save of the vector registers).
 * On entry: r13 == 'current' && last_task_used_altivec != 'current'
 */
_STATIC(load_up_altivec)
	mfmsr	r5			/* grab the current MSR */
	oris	r5,r5,MSR_VEC@h
	mtmsrd	r5			/* enable use of VMX now */
	isync
	
/*
 * For SMP, we don't do lazy VMX switching because it just gets too
 * horrendously complex, especially when a task switches from one CPU
 * to another.  Instead we call giveup_altvec in switch_to.
 * VRSAVE isn't dealt with here, that is done in the normal context
 * switch code. Note that we could rely on vrsave value to eventually
 * avoid saving all of the VREGs here...
 */
#ifndef CONFIG_SMP
	ld	r3,last_task_used_altivec@got(r2)
	ld	r4,0(r3)
	cmpdi	0,r4,0
	beq	1f
	/* Save VMX state to last_task_used_altivec's THREAD struct */
	addi	r4,r4,THREAD
	SAVE_32VRS(0,r5,r4)
	mfvscr	vr0
	li	r10,THREAD_VSCR
	stvx	vr0,r10,r4
	/* Disable VMX for last_task_used_altivec */
	ld	r5,PT_REGS(r4)
	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
	lis	r6,MSR_VEC@h
	andc	r4,r4,r6
	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#endif /* CONFIG_SMP */
	/* Hack: if we get an altivec unavailable trap with VRSAVE
	 * set to all zeros, we assume this is a broken application
	 * that fails to set it properly, and thus we switch it to
	 * all 1's
	 */
	mfspr	r4,SPRN_VRSAVE
	cmpdi	0,r4,0
	bne+	1f
	li	r4,-1
	mtspr	SPRN_VRSAVE,r4
1:
	/* enable use of VMX after return */
	ld	r4,PACACURRENT(r13)
	addi	r5,r4,THREAD		/* Get THREAD */
	oris	r12,r12,MSR_VEC@h
	std	r12,_MSR(r1)
	li	r4,1
	li	r10,THREAD_VSCR
	stw	r4,THREAD_USED_VR(r5)
	lvx	vr0,r10,r5
	mtvscr	vr0
	REST_32VRS(0,r4,r5)
#ifndef CONFIG_SMP
	/* Update last_task_used_math to 'current' */
	subi	r4,r5,THREAD		/* Back to 'current' */
	std	r4,0(r3)
#endif /* CONFIG_SMP */
	/* restore registers and return */
	b	fast_exception_return

/*
 * disable_kernel_altivec()
 * Disable the VMX.
 */
_GLOBAL(disable_kernel_altivec)
	mfmsr	r3
	rldicl	r0,r3,(63-MSR_VEC_LG),1
	rldicl	r3,r0,(MSR_VEC_LG+1),0
	mtmsrd	r3			/* disable use of VMX now */
	isync
	blr

/*
 * giveup_altivec(tsk)
 * Disable VMX for the task given as the argument,
 * and save the vector registers in its thread_struct.
 * Enables the VMX for use in the kernel on return.
 */
_GLOBAL(giveup_altivec)
	mfmsr	r5
	oris	r5,r5,MSR_VEC@h
	mtmsrd	r5			/* enable use of VMX now */
	isync
	cmpdi	0,r3,0
	beqlr-				/* if no previous owner, done */
	addi	r3,r3,THREAD		/* want THREAD of task */
	ld	r5,PT_REGS(r3)
	cmpdi	0,r5,0
	SAVE_32VRS(0,r4,r3)
	mfvscr	vr0
	li	r4,THREAD_VSCR
	stvx	vr0,r4,r3
	beq	1f
	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
	lis	r3,MSR_VEC@h
	andc	r4,r4,r3		/* disable FP for previous task */
	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#ifndef CONFIG_SMP
	li	r5,0
	ld	r4,last_task_used_altivec@got(r2)
	std	r5,0(r4)
#endif /* CONFIG_SMP */
	blr

#endif /* CONFIG_ALTIVEC */

#ifdef CONFIG_SMP
#ifdef CONFIG_PPC_PMAC
/*
 * On PowerMac, secondary processors starts from the reset vector, which
 * is temporarily turned into a call to one of the functions below.
 */
	.section ".text";
	.align 2 ;

	.globl	pmac_secondary_start_1	
pmac_secondary_start_1:	
	li	r24, 1
	b	.pmac_secondary_start
	
	.globl pmac_secondary_start_2
pmac_secondary_start_2:	
	li	r24, 2
	b	.pmac_secondary_start
	
	.globl pmac_secondary_start_3
pmac_secondary_start_3:
	li	r24, 3
	b	.pmac_secondary_start
	
_GLOBAL(pmac_secondary_start)
	/* turn on 64-bit mode */
	bl	.enable_64b_mode
	isync

	/* Copy some CPU settings from CPU 0 */
	bl	.__restore_cpu_setup

	/* pSeries do that early though I don't think we really need it */
	mfmsr	r3
	ori	r3,r3,MSR_RI
	mtmsrd	r3			/* RI on */

	/* Set up a paca value for this processor. */
	LOADADDR(r4, paca) 		 /* Get base vaddr of paca array	*/
	mulli	r13,r24,PACA_SIZE	 /* Calculate vaddr of right paca */
	add	r13,r13,r4		/* for this processor.		*/
	mtspr	SPRG3,r13		 /* Save vaddr of paca in SPRG3	*/

	/* Create a temp kernel stack for use before relocation is on.	*/
	ld	r1,PACAEMERGSP(r13)
	subi	r1,r1,STACK_FRAME_OVERHEAD

	b	.__secondary_start

#endif /* CONFIG_PPC_PMAC */

/*
 * This function is called after the master CPU has released the
 * secondary processors.  The execution environment is relocation off.
 * The paca for this processor has the following fields initialized at
 * this point:
 *   1. Processor number
 *   2. Segment table pointer (virtual address)
 * On entry the following are set:
 *   r1	= stack pointer.  vaddr for iSeries, raddr (temp stack) for pSeries
 *   r24   = cpu# (in Linux terms)
 *   r13   = paca virtual address
 *   SPRG3 = paca virtual address
 */
_GLOBAL(__secondary_start)

	HMT_MEDIUM			/* Set thread priority to MEDIUM */

	ld	r2,PACATOC(r13)
	li	r6,0
	stb	r6,PACAPROCENABLED(r13)

#ifndef CONFIG_PPC_ISERIES
	/* Initialize the page table pointer register. */
	LOADADDR(r6,_SDR1)
	ld	r6,0(r6)		/* get the value of _SDR1	 */
	mtspr	SDR1,r6			/* set the htab location	 */
#endif
	/* Initialize the first segment table (or SLB) entry		 */
	ld	r3,PACASTABVIRT(r13)	/* get addr of segment table	 */
	bl	.stab_initialize

	/* Initialize the kernel stack.  Just a repeat for iSeries.	 */
	LOADADDR(r3,current_set)
	sldi	r28,r24,3		/* get current_set[cpu#]	 */
	ldx	r1,r3,r28
	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
	std	r1,PACAKSAVE(r13)

	ld	r3,PACASTABREAL(r13)	/* get raddr of segment table	 */
	ori	r4,r3,1			/* turn on valid bit		 */

#ifdef CONFIG_PPC_ISERIES
	li	r0,-1			/* hypervisor call */
	li	r3,1
	sldi	r3,r3,63		/* 0x8000000000000000 */
	ori	r3,r3,4			/* 0x8000000000000004 */
	sc				/* HvCall_setASR */
#else
	/* set the ASR */
	li	r3,SYSTEMCFG_PHYS_ADDR	/* r3 = ptr to systemcfg	 */
	lwz	r3,PLATFORM(r3)		/* r3 = platform flags		 */
	cmpldi 	r3,PLATFORM_PSERIES_LPAR
	bne	98f
	mfspr	r3,PVR
	srwi	r3,r3,16
	cmpwi	r3,0x37			/* SStar  */
	beq	97f
	cmpwi	r3,0x36			/* IStar  */
	beq	97f
	cmpwi	r3,0x34			/* Pulsar */
	bne	98f
97:	li	r3,H_SET_ASR		/* hcall = H_SET_ASR */
	HVSC				/* Invoking hcall */
	b	99f
98:					/* !(rpa hypervisor) || !(star)  */
	mtasr	r4			/* set the stab location	 */
99:
#endif
	li	r7,0
	mtlr	r7

	/* enable MMU and jump to start_secondary */
	LOADADDR(r3,.start_secondary_prolog)
	SET_REG_TO_CONST(r4, MSR_KERNEL)
#ifdef DO_SOFT_DISABLE
	ori	r4,r4,MSR_EE
#endif
	mtspr	SRR0,r3
	mtspr	SRR1,r4
	rfid
	b	.	/* prevent speculative execution */

/* 
 * Running with relocation on at this point.  All we want to do is
 * zero the stack back-chain pointer before going into C code.
 */
_GLOBAL(start_secondary_prolog)
	li	r3,0
	std	r3,0(r1)		/* Zero the stack frame pointer	*/
	bl	.start_secondary
#endif

/*
 * This subroutine clobbers r11 and r12
 */
_GLOBAL(enable_64b_mode)
	mfmsr	r11			/* grab the current MSR */
	li	r12,1
	rldicr	r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
	or	r11,r11,r12
	li	r12,1
	rldicr	r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
	or	r11,r11,r12
	mtmsrd	r11
	isync
	blr

#ifdef CONFIG_PPC_MULTIPLATFORM
/*
 * This is where the main kernel code starts.
 */
_STATIC(start_here_multiplatform)
	/* get a new offset, now that the kernel has moved. */
	bl	.reloc_offset
	mr	r26,r3

	/* Clear out the BSS. It may have been done in prom_init,
	 * already but that's irrelevant since prom_init will soon
	 * be detached from the kernel completely. Besides, we need
	 * to clear it now for kexec-style entry.
	 */
	LOADADDR(r11,__bss_stop)
	LOADADDR(r8,__bss_start)
	sub	r11,r11,r8		/* bss size			*/
	addi	r11,r11,7		/* round up to an even double word */
	rldicl. r11,r11,61,3		/* shift right by 3		*/
	beq	4f
	addi	r8,r8,-8
	li	r0,0
	mtctr	r11			/* zero this many doublewords	*/
3:	stdu	r0,8(r8)
	bdnz	3b
4:

	mfmsr	r6
	ori	r6,r6,MSR_RI
	mtmsrd	r6			/* RI on */

	/* setup the systemcfg pointer which is needed by *tab_initialize	*/
	LOADADDR(r6,systemcfg)
	sub	r6,r6,r26		/* addr of the variable systemcfg */
	li	r27,SYSTEMCFG_PHYS_ADDR
	std	r27,0(r6)	 	/* set the value of systemcfg	*/

#ifdef CONFIG_HMT
	/* Start up the second thread on cpu 0 */
	mfspr	r3,PVR
	srwi	r3,r3,16
	cmpwi	r3,0x34			/* Pulsar  */
	beq	90f
	cmpwi	r3,0x36			/* Icestar */
	beq	90f
	cmpwi	r3,0x37			/* SStar   */
	beq	90f
	b	91f			/* HMT not supported */
90:	li	r3,0
	bl	.hmt_start_secondary
91:
#endif

	/* The following gets the stack and TOC set up with the regs */
	/* pointing to the real addr of the kernel stack.  This is   */
	/* all done to support the C function call below which sets  */
	/* up the htab.  This is done because we have relocated the  */
	/* kernel but are still running in real mode. */

	LOADADDR(r3,init_thread_union)
	sub	r3,r3,r26

	/* set up a stack pointer (physical address) */
	addi	r1,r3,THREAD_SIZE
	li	r0,0
	stdu	r0,-STACK_FRAME_OVERHEAD(r1)

	/* set up the TOC (physical address) */
	LOADADDR(r2,__toc_start)
	addi	r2,r2,0x4000
	addi	r2,r2,0x4000
	sub	r2,r2,r26

	LOADADDR(r3,cpu_specs)
	sub	r3,r3,r26
	LOADADDR(r4,cur_cpu_spec)
	sub	r4,r4,r26
	mr	r5,r26
	bl	.identify_cpu

	/* Save some low level config HIDs of CPU0 to be copied to
	 * other CPUs later on, or used for suspend/resume
	 */
	bl	.__save_cpu_setup
	sync

	/* Setup a valid physical PACA pointer in SPRG3 for early_setup
	 * note that boot_cpuid can always be 0 nowadays since there is
	 * nowhere it can be initialized differently before we reach this
	 * code
	 */
	LOADADDR(r27, boot_cpuid)
	sub	r27,r27,r26
	lwz	r27,0(r27)

	LOADADDR(r24, paca) 		/* Get base vaddr of paca array	 */
	mulli	r13,r27,PACA_SIZE	/* Calculate vaddr of right paca */
	add	r13,r13,r24		/* for this processor.		 */
	sub	r13,r13,r26		/* convert to physical addr	 */
	mtspr	SPRG3,r13		/* PPPBBB: Temp... -Peter */
	
	/* Do very early kernel initializations, including initial hash table,
	 * stab and slb setup before we turn on relocation.	*/

	/* Restore parameters passed from prom_init/kexec */
	mr	r3,r31
 	bl	.early_setup

	/* set the ASR */
	ld	r3,PACASTABREAL(r13)
	ori	r4,r3,1			/* turn on valid bit		 */
	li	r3,SYSTEMCFG_PHYS_ADDR	/* r3 = ptr to systemcfg */
	lwz	r3,PLATFORM(r3)		/* r3 = platform flags */
	cmpldi 	r3,PLATFORM_PSERIES_LPAR
	bne	98f
	mfspr	r3,PVR
	srwi	r3,r3,16
	cmpwi	r3,0x37			/* SStar */
	beq	97f
	cmpwi	r3,0x36			/* IStar  */
	beq	97f
	cmpwi	r3,0x34			/* Pulsar */
	bne	98f
97:	li	r3,H_SET_ASR		/* hcall = H_SET_ASR */
	HVSC				/* Invoking hcall */
	b	99f
98:					/* !(rpa hypervisor) || !(star) */
	mtasr	r4			/* set the stab location	*/
99:
	/* Set SDR1 (hash table pointer) */
	li	r3,SYSTEMCFG_PHYS_ADDR	/* r3 = ptr to systemcfg */
	lwz	r3,PLATFORM(r3)		/* r3 = platform flags */
	/* Test if bit 0 is set (LPAR bit) */
	andi.	r3,r3,0x1
	bne	98f
	LOADADDR(r6,_SDR1)		/* Only if NOT LPAR */
	sub	r6,r6,r26
	ld	r6,0(r6)		/* get the value of _SDR1 */
	mtspr	SDR1,r6			/* set the htab location  */
98: 
	LOADADDR(r3,.start_here_common)
	SET_REG_TO_CONST(r4, MSR_KERNEL)
	mtspr	SRR0,r3
	mtspr	SRR1,r4
	rfid
	b	.	/* prevent speculative execution */
#endif /* CONFIG_PPC_MULTIPLATFORM */
	
	/* This is where all platforms converge execution */
_STATIC(start_here_common)
	/* relocation is on at this point */

	/* The following code sets up the SP and TOC now that we are */
	/* running with translation enabled. */

	LOADADDR(r3,init_thread_union)

	/* set up the stack */
	addi	r1,r3,THREAD_SIZE
	li	r0,0
	stdu	r0,-STACK_FRAME_OVERHEAD(r1)

	/* Apply the CPUs-specific fixups (nop out sections not relevant
	 * to this CPU
	 */
	li	r3,0
	bl	.do_cpu_ftr_fixups

	/* setup the systemcfg pointer */
	LOADADDR(r9,systemcfg)
	SET_REG_TO_CONST(r8, SYSTEMCFG_VIRT_ADDR)
	std	r8,0(r9)

	LOADADDR(r26, boot_cpuid)
	lwz	r26,0(r26)

	LOADADDR(r24, paca) 		/* Get base vaddr of paca array  */
	mulli	r13,r26,PACA_SIZE	/* Calculate vaddr of right paca */
	add	r13,r13,r24		/* for this processor.		 */
	mtspr	SPRG3,r13

	/* ptr to current */
	LOADADDR(r4,init_task)
	std	r4,PACACURRENT(r13)

	/* Load the TOC */
	ld	r2,PACATOC(r13)
	std	r1,PACAKSAVE(r13)

	bl	.setup_system

	/* Load up the kernel context */
5:
#ifdef DO_SOFT_DISABLE
	li	r5,0
	stb	r5,PACAPROCENABLED(r13)	/* Soft Disabled */
	mfmsr	r5
	ori	r5,r5,MSR_EE		/* Hard Enabled */
	mtmsrd	r5
#endif

	bl .start_kernel

_GLOBAL(__setup_cpu_power3)
	blr

_GLOBAL(hmt_init)
#ifdef CONFIG_HMT
	LOADADDR(r5, hmt_thread_data)
	mfspr	r7,PVR
	srwi	r7,r7,16
	cmpwi	r7,0x34			/* Pulsar  */
	beq	90f
	cmpwi	r7,0x36			/* Icestar */
	beq	91f
	cmpwi	r7,0x37			/* SStar   */
	beq	91f
	b	101f
90:	mfspr	r6,PIR
	andi.	r6,r6,0x1f
	b	92f
91:	mfspr	r6,PIR
	andi.	r6,r6,0x3ff
92:	sldi	r4,r24,3
	stwx	r6,r5,r4
	bl	.hmt_start_secondary
	b	101f

__hmt_secondary_hold:
	LOADADDR(r5, hmt_thread_data)
	clrldi	r5,r5,4
	li	r7,0
	mfspr	r6,PIR
	mfspr	r8,PVR
	srwi	r8,r8,16
	cmpwi	r8,0x34
	bne	93f
	andi.	r6,r6,0x1f
	b	103f
93:	andi.	r6,r6,0x3f

103:	lwzx	r8,r5,r7
	cmpw	r8,r6
	beq	104f
	addi	r7,r7,8
	b	103b

104:	addi	r7,r7,4
	lwzx	r9,r5,r7
	mr	r24,r9
101:
#endif
	mr	r3,r24
	b	.pSeries_secondary_smp_init

#ifdef CONFIG_HMT
_GLOBAL(hmt_start_secondary)
	LOADADDR(r4,__hmt_secondary_hold)
	clrldi	r4,r4,4
	mtspr	NIADORM, r4
	mfspr	r4, MSRDORM
	li	r5, -65
	and	r4, r4, r5
	mtspr	MSRDORM, r4
	lis	r4,0xffef
	ori	r4,r4,0x7403
	mtspr	TSC, r4
	li	r4,0x1f4
	mtspr	TST, r4
	mfspr	r4, HID0
	ori	r4, r4, 0x1
	mtspr	HID0, r4
	mfspr	r4, CTRLF
	oris	r4, r4, 0x40
	mtspr	CTRLT, r4
	blr
#endif

#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES)
_GLOBAL(smp_release_cpus)
	/* All secondary cpus are spinning on a common
	 * spinloop, release them all now so they can start
	 * to spin on their individual paca spinloops.
	 * For non SMP kernels, the secondary cpus never
	 * get out of the common spinloop.
	 */
	li	r3,1
	LOADADDR(r5,__secondary_hold_spinloop)
	std	r3,0(r5)
	sync
	blr
#endif /* CONFIG_SMP && !CONFIG_PPC_ISERIES */


/*
 * We put a few things here that have to be page-aligned.
 * This stuff goes at the beginning of the data segment,
 * which is page-aligned.
 */
	.data
	.align	12
	.globl	sdata
sdata:
	.globl	empty_zero_page
empty_zero_page:
	.space	4096

	.globl	swapper_pg_dir
swapper_pg_dir:
	.space	4096

	.globl	ioremap_dir
ioremap_dir:
	.space	4096

#ifdef CONFIG_SMP
/* 1 page segment table per cpu (max 48, cpu0 allocated at STAB0_PHYS_ADDR) */
	.globl	stab_array
stab_array:
	.space	4096 * 48
#endif
	
/*
 * This space gets a copy of optional info passed to us by the bootstrap
 * Used to pass parameters into the kernel like root=/dev/sda1, etc.
 */
	.globl	cmd_line
cmd_line:
	.space	COMMAND_LINE_SIZE
